@InProceedings{PinheiroSilvSoarQuil:2020:GrClAn,
author = "Pinheiro, Gabriel Augusto Lins Leal and Silva, Juarez L. F. da
Silva and Soares, Marinalva D. and Quiles, Marcos
Gon{\c{c}}alves",
affiliation = "{Instituto Nacional de Pesquisas Espaciais (INPE)} and
{Universidade de S{\~a}o Paulo (USP)} and {Universidade Federal
de S{\~a}o Paulo (UNIFESP)} and {Universidade Federal de S{\~a}o
Paulo (UNIFESP)}",
title = "A graph-based clustering analysis of the QM9 dataset via SMILES
descriptors",
booktitle = "Proceedings...",
year = "2020",
editor = "Gervasi, O. and Murgante, B. and Misra, S. and Garau, C. and
Blecic, I. and Taniar, D. and Apduhan, B. O. and Rocha, A. M. A.
C. and Tarantino, E. and Torre, C. M. and Karaca, Y.",
pages = "421--433",
organization = "International Conference on Computational Science and Its
Applications (ICCSA), 20.",
publisher = "Springer",
note = "Lecture Notes in Computer Science, v.12249",
keywords = "Clustering · Graph · Quantum-chemistry.",
abstract = "Machine learning has become a new hot-topic in Materials Sciences.
For instance, several approaches from unsupervised and supervised
learning have been applied as surrogate models to study the
properties of several classes of materials. Here, we investigate,
from a graphbased clustering perspective, the Quantum QM9 dataset.
This dataset is one of the most used datasets in this scenario.
Our investigation is twofold: 1) understand whether the QM9
samples are organized in clusters, and 2) if the clustering
structure might provide us with some insights regarding anomalous
molecules, or molecules that jeopardize the accuracy of supervised
property prediction methods. Our results show that the QM9 is
indeed structured into clusters. These clusters, for instance,
might suggest better approaches for splitting the dataset when
using cross-correlation approaches in supervised learning.
However, regarding our second question, our finds indicate that
the clustering structure, obtained via Simplified Molecular Input
Line Entry System (SMILES) representation, cannot be used to
filter anomalous samples in property prediction. Thus, further
investigation regarding this limitation should be conducted in
future research.",
conference-location = "Cagliari, Italy",
conference-year = "01-04 July",
doi = "10.1007/978-3-030-58799-4_74",
url = "http://dx.doi.org/10.1007/978-3-030-58799-4_74",
isbn = "978-303058798-7",
issn = "03029743",
language = "en",
targetfile = "pinheiro_graph.pdf",
urlaccessdate = "04 maio 2024"
}